/*
Compute earnings difference in PSID for pepole did, did not have married parents

Author: GA
Last Updated: 8/5/2019

*/

***********step 1: get parent IDs***********
clear all

//get parent ID values
do "$dofiles/PSID/00_psid_parent_readin_v1"
do "$dofiles/PSID/00_psid_parent_format_v1"

gen uniqid = ER30001*1000 + ER30002
gen mom_id = ER32009*1000 + ER32010
gen dad_id = ER32016*1000 + ER32017
keep *id
save "$temp/psid_parent_ids", replace

***********step 2: get marital histories***********
clear all //to deal with variable labels

//get parent ID values
do "$dofiles/PSID/00_psid_fam_readin_v1"
do "$dofiles/PSID/00_psid_fam_format_v1"

gen uniqid = ER30001*1000 + ER30002 //unique identifier

//child birth date variables
ren ER32024 birth_year_1 //first
ren ER32028 birth_year_4 //fourth youngest
ren ER32030 birth_year_3 //third youngest
ren ER32032 birth_year_2 //second youngest
ren ER32026 birth_year_5 //youngest

//rename marital status variable
ds uniqid birth*, not

***********big renaming loop; use variable labels
ds uniqid, not
foreach var in `r(varlist)'{
	
	//fetch label
	local lab: variable label `var'
	
	//get year for individual files
	local num = word("`lab'", -1)
	
	//skip variables inconvenient labels for now
	if length("`num'")!=2 | "`var'" == "V4373" {
		continue
	}	
	
	if `num'<20{ //2000s variable
		local year = 2000 + `num'
	}
	if `num'>20{ //1900s variable
		local year = 1900 + `num'
	}
	
	****rename according to label contents
	//age
	if strpos("`lab'", "MARR") |  strpos("`lab'", "MARITAL"){
		ren `var' marst`year'
	}
}

keep uniqid birth* marst* //keep fertility and marriage dates
reshape long marst, i(uniqid) j(year)
drop birth*
replace marst = 1 if marst>0 //0 indicates non-married, 1 indicates married
save "$temp/psid_marst", replace

***********Step 3: Code Marital Histories***********
use "$temp/psid_marst", clear
merge m:1 uniqid using "$temp/psid_parent_ids", keep(match) nogen
merge 1:1 uniqid year using "$temp/psid_long", keep(match) nogen

//merge on mother and father marital status
preserve
keep uniqid marst year
tempfile mom_marst
save `mom_marst'
restore

preserve
keep uniqid marst year
tempfile dad_marst
save `dad_marst'
restore

ren uniqid temp
ren mom_id uniqid
drop marst
merge m:1 uniqid year using `mom_marst', keep(1 3) nogen
ren uniqid mom_id 
ren temp uniqid
ren marst mom_marst

ren uniqid temp
ren dad_id uniqid
merge m:1 uniqid year using `dad_marst', keep(1 3) nogen
ren uniqid dad_id 
ren temp uniqid
ren marst dad_marst

//parents both married?
gen pmarst = (mom_marst == 1 & dad_marst == 1)

//coding of marriage variable
gen temp = .
replace temp = pmarst if age == 18
bys uniqid: egen parents_marst = max(temp)
count if parents_marst == .

//in case of individual being skipped in interview, add a few more candidate years to get the birth state
/*forval y = 1/5{
	replace temp = .
	replace temp = pmarst if age == 18 - `y'
	bys uniqid: egen parents_marst_temp = max(temp)
	replace parents_marst = parents_marst_temp if parents_marst == . & parents_marst_temp!=. 
	drop parents_marst_temp
}
drop temp*/
count if parents_marst == . 
keep uniqid year parents_marst
save "$temp/psid_marst_hist", replace

***********step 3: add to cleaned psid data and evaluate***********
use "$temp/psid_long_cleaned", clear
ren statefips stfips
tostring stfips, replace
keep if age<=54 //limit age range

//get skill prices
merge m:1 stfips using "$temp/state_skill_prices_final", keep(match) nogen

//normalize wages
replace wages = wages/skill_price_2 //convert to real terms to account for oversampling of different locations
su wages
replace wages = wages/`r(mean)' //normalize by mean earnings; 43,983.68
merge 1:1 uniqid year using "$temp/psid_marst_hist", keep(match) nogen

//generate age groups and collapse
gen age_group = 1
replace age_group = 2 if age>=36 //older age groups
gen count = 1

keep if age_group == 2
collapse (first) parents_marst (sum) count (mean) wages, by(uniqid)
drop if count<6 //drop if we don't get enough years
drop count

drop if wages>6
su wages
su wages if parents_marst
su wages if !parents_marst

